All external data files used in this code are available at https://github.com/MoTrPAC/motrpac-rat-training-mitochondria/tree/main/data
This script assumes that these files were downloaded to the data_dir path specified above
# read the mtDNA data file
mt_data = read.csv(
file=paste0(data_dir,"mtDNA_motrpac_pass1b.csv"),
stringsAsFactors = F
)
mt_data$bid = mt_data$Label
mt_data$Tissue[mt_data$Tissue=="AORTA"] = "VENACV"
# read the RNA-seq metrics to obtain mtRNA read percentages
rnaseq_qa_qc = TRNSCRPT_META
pct_mito_reads = rnaseq_qa_qc$pct_chrM
names(pct_mito_reads) = as.character(rnaseq_qa_qc$vial_label)
rownames(rnaseq_qa_qc) = as.character(rnaseq_qa_qc$vial_label)
colnames(rnaseq_qa_qc) = tolower(colnames(rnaseq_qa_qc))
# load the pheno data
pheno = PHENO
cardiolipins_dir = paste0(data_dir,"/cardiolipins/")
cl_files = list.files(cardiolipins_dir,full.names = T)
cl_files = cl_files[!grepl("name_mapping",cl_files)]
cl_data = c()
cl_fdata = c()
for(f in cl_files){
f_arr = strsplit(f,split="_")[[1]]
tissue = f_arr[length(f_arr)]
tissue = gsub(".csv","",tissue)
tissue = toupper(tissue)
if(tissue=="MUSCLE"){
tissue = "SKMGN"
}
if(tissue=="hippocampus"){
tissue = "HIPPOC"
}
d = fread(f,stringsAsFactors = F,data.table = F,header=T)
d = d[!is.na(d[,1]),]
if(ncol(d)<51){next} # ignore inomplete tissues
# use bid instead of vial labels
colnames(d)[-1] = substring(colnames(d)[-1],1,5)
feature_d = cbind(tissue,d[,1])
if(length(cl_data)==0){
cl_data = d
cl_fdata = feature_d
}else{
cl_data = rbind(cl_data,d[,colnames(cl_data)])
cl_fdata = rbind(cl_fdata,feature_d)
}
}
cl_data = cl_data[,-1]
colnames(cl_fdata) = c("tissue","feature_ID")
cl_fdata = data.frame(cl_fdata,stringsAsFactors = F)
# transform WATSC and SKMGN to WAT-SC and SKM-GN
# to be consistent with the pheno data and the other
# data objects:
cl_fdata$tissue[cl_fdata$tissue=="SKMGN"] = "SKM-GN"
cl_fdata$tissue[cl_fdata$tissue=="WATSC"] = "WAT-SC"
cl_fdata$tissue[cl_fdata$tissue=="WAT"] = "WAT-SC"
table(cl_fdata$tissue)
##
## HEART KIDNEY LIVER LUNG SKM-GN WAT-SC
## 11 16 9 3 11 4
sort(table(cl_fdata$feature_ID),decreasing=T)[1:4]
##
## CL(72:8)>CL(18:2/18:2/18:2/18:2) CL(74:11)
## 4 4
## CL(74:9) CL(70:7)
## 4 3
cl_fdata[cl_fdata$feature_ID=="CL(72:8)",]
## tissue feature_ID
## 43 SKM-GN CL(72:8)
## 53 WAT-SC CL(72:8)
selected_cls = unique(cl_fdata$feature_ID)
# # Optional: get cardiolipins from the main landscape paper:
# sample_data = METAB_NORM_DATA_FLAT
# sample_data_featureinfo = sample_data[,1:5]
# # Limit the data to cardiolipins
# cl_inds = grepl("^CL",sample_data$feature_ID,perl = T)
# cl_data = sample_data[cl_inds,]
# cl_fdata = sample_data_featureinfo[cl_inds,]
# # change column names to bids
# m = unique(pheno[,c("pid","bid")])
# p2b = as.character(m[,2])
# names(p2b) = as.character(m[,1])
# colnames(cl_data)[-c(1:5)] = unname(p2b[colnames(cl_data)[-c(1:5)]])
# Add mito gene annotations
mt = fread(paste0(data_dir,
"external-datasets_gene-sets_Mitocarta_Rat.MitoCarta3.0.genes_only.txt"),
stringsAsFactors = F,data.table = F)
# additional pathway enrichment with mitocarta pathways
mt_pw = fread(paste0(data_dir,"Human.MitoCarta3.0.txt"),header = T,
stringsAsFactors = F,data.table = F)
# map transcripts to genes using biomart
library(biomaRt)
mart_rat = useMart("ensembl", dataset = "rnorvegicus_gene_ensembl")
attrs = c("ensembl_gene_id","ensembl_peptide_id","rgd_symbol")
rat_prot_gene = getBM(attributes = attrs, mart = mart_rat, uniqueRows=T)
rat_prot_gene = rat_prot_gene[rat_prot_gene$ensembl_peptide_id!="",]
rat_prot_gene = rat_prot_gene[rat_prot_gene$rgd_symbol != "",]
# read in rat to human mapping
rat_to_human = fread(paste0(data_dir,"RGD_ORTHOLOGS_20201001.txt"),
stringsAsFactors = F,data.table = F)
rat_to_human = rat_to_human[!is.na(rat_to_human$HUMAN_ORTHOLOG_SYMBOL),]
rat_to_human_map = unique(rat_to_human[,c("RAT_GENE_SYMBOL", "HUMAN_ORTHOLOG_SYMBOL")])
# make a pathway:member list
mitocarta_pathways = list()
for(pw in mt_pw$MitoPathway){
members = unname(unlist(strsplit(mt_pw[mt_pw$MitoPathway==pw, "Genes"], ', ')))
rat_members = rat_to_human_map[rat_to_human_map$HUMAN_ORTHOLOG_SYMBOL %in% members, 1]
rat_members = unique(na.omit(rat_members))
mitocarta_pathways[[pw]] = rat_members
#print(paste(pw, length(members), length(rat_members)))
}
save(rat_prot_gene,
mt,mt_pw,mitocarta_pathways,rat_to_human,
file=paste0(data_dir,"mito_gene_annotation.RData"))
Load above data:
load(paste0(data_dir,"mito_gene_annotation.RData"))
control_vials = rownames(pheno)[pheno$intervention=="control"]
control_vials = intersect(control_vials,names(pct_mito_reads))
baseline_pct_mito = data.frame(
pct_mito_reads = pct_mito_reads[control_vials],
tissue = pheno[control_vials,"tissue"],
sex = pheno[control_vials,"sex"]
)
baseline_pct_mito_male = baseline_pct_mito[baseline_pct_mito$sex == "male",]
baseline_pct_mito_female = baseline_pct_mito[baseline_pct_mito$sex == "female",]
currdf = baseline_pct_mito_male
tissue_med = tapply(currdf$pct_mito_reads,currdf$tissue,median)
tissue_med = sort(tissue_med,decreasing = T)
currdf$color = TISSUE_COLORS[currdf$tissue]
currdf$tissue = factor(currdf$tissue,levels = names(tissue_med))
currdf_male = currdf
currdf_male$sex = "Male"
currdf = baseline_pct_mito_female
tissue_med = tapply(currdf$pct_mito_reads,currdf$tissue,median)
tissue_med = sort(tissue_med,decreasing = T)
currdf$color = TISSUE_COLORS[currdf$tissue]
currdf$tissue = factor(currdf$tissue,levels = names(tissue_med))
currdf_female = currdf
currdf_female$sex = "Female"
baseline_pct_mito = rbind(currdf_male,currdf_female)
ggplot(baseline_pct_mito, aes(x=tissue, y=pct_mito_reads,color = as.character(tissue))) +
geom_boxplot() + facet_wrap(~sex) + xlab("") +
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank()) +
scale_color_manual(
labels = sort(as.character(unique(baseline_pct_mito$tissue))),
values = TISSUE_COLORS[sort(as.character(unique(baseline_pct_mito$tissue)))])
mt_dna_copy = mt_data[mt_data$Group=="Ctrl",]
mt_dna_copy$sex = ifelse(mt_dna_copy$sex == "M","Male","Female")
tissue_med = tapply(mt_dna_copy$Level,mt_dna_copy$Tissue,median)
tissue_med = sort(tissue_med,decreasing = T)
mt_dna_copy$Tissue = factor(mt_dna_copy$Tissue,levels = names(tissue_med))
ggplot(mt_dna_copy, aes(x=Tissue, y=Level,color=as.character(Tissue))) +
geom_boxplot() + facet_wrap(~sex) + xlab("") +
theme(axis.text.x=element_blank(),axis.ticks.x=element_blank()) +
scale_color_manual(
labels = sort(as.character(unique(mt_dna_copy$Tissue))),
values = TISSUE_COLORS[sort(as.character(unique(mt_dna_copy$Tissue)))])
# Get data frames with all mito biomarkers, by tissue
mt_biomarkers = list()
for(tissue_name in unique(pheno$tissue)){
if(is.na(tissue_name)){next}
if(grepl("VENA",tissue_name)){next}
print(paste("Analyzing data from:",tissue_name))
# get all vial labels
tissue_vials = rownames(pheno)[pheno$tissue == tissue_name]
# limit to vial labels that appear in the rna-seq data
tissue_vials = intersect(names(pct_mito_reads),tissue_vials)
tissue_vials = setdiff(tissue_vials,OUTLIERS$viallabel)
if(length(tissue_vials)<5){next}
# get the bids
tissue_bids = sapply(tissue_vials,substring,1,5)
# save results in a data frame
tissue_df = data.frame(
mt_reads = pct_mito_reads[tissue_vials],
viallabel = tissue_vials,
bid = tissue_bids
)
tissue_df$mtdna = NA
if(sum(mt_data$Tissue==tissue_name)>0){
currmt_data = mt_data[mt_data$Tissue==tissue_name,]
rownames(currmt_data) = as.character(currmt_data$bid)
curr_shared_bids = intersect(tissue_df$bid,rownames(currmt_data))
for(bid in curr_shared_bids){
tissue_df[tissue_df$bid==bid,"mtdna"] = currmt_data[bid,"Level"]
}
tissue_df$mtdna = log2(tissue_df$mtdna)
}
tissue_df$clPC1 = NA
tissue_df$clPC2 = NA
tissue_df$clPC3 = NA
for(cl in selected_cls){tissue_df[[cl]] = NA}
curr_cl_data = NULL
if(sum(cl_fdata$tissue == tissue_name)>0){
curr_shared_bids = intersect(tissue_df$bid,colnames(cl_data))
curr_cl_data = cl_data[cl_fdata$tissue == tissue_name,curr_shared_bids]
metab_names = cl_fdata$feature_ID[cl_fdata$tissue == tissue_name]
curr_cl_data = aggregate(curr_cl_data,by=list(metab_names),mean,na.rm=T)
rownames(curr_cl_data) = curr_cl_data[,1]
curr_cl_data = curr_cl_data[,-1]
# remove columns that are all NAs
bid_na_rate = colSums(is.na(curr_cl_data)) / nrow(curr_cl_data)
curr_cl_data = curr_cl_data[,bid_na_rate<1]
curr_shared_bids = colnames(curr_cl_data)
# check missing values
na_rate = sum(is.na(curr_cl_data))/(nrow(curr_cl_data)*ncol(curr_cl_data))
if(na_rate >= 0.05){
curr_cl_data = NULL
}
else{
# use min imputation
curr_cl_data = t(curr_cl_data)
curr_cl_data[is.nan(curr_cl_data)] = NA
curr_cl_data[is.na(curr_cl_data)] = min(curr_cl_data,na.rm=T)
}
if(!is.null(curr_cl_data) && nrow(curr_cl_data)>0){
# log2 transform
curr_cl_data = log2(curr_cl_data)
cl_pca = prcomp(curr_cl_data)
num_cols = min(3,ncol(curr_cl_data))
curr_cl_data_raw = curr_cl_data
curr_cl_data = as.matrix(cl_pca$x[,1:num_cols],ncol=num_cols)
colnames(curr_cl_data) = paste0("clPC",1:num_cols)
for(pc in colnames(curr_cl_data)){
for(bid in curr_shared_bids){
tissue_df[tissue_df$bid==bid,pc] = curr_cl_data[bid,pc]
}
}
# Add selected CLs
for(cl in selected_cls){
if(!cl %in% colnames(curr_cl_data_raw)){next}
for(bid in curr_shared_bids){
tissue_df[tissue_df$bid==bid,cl] = curr_cl_data_raw[bid,cl]
}
}
}
}
# add sex and group, save the df
tissue_df$group = pheno[tissue_df$viallabel,"group"]
tissue_df$sex = pheno[tissue_df$viallabel,"sex"]
mt_biomarkers[[gsub("-","",tissue_name)]] = tissue_df
}
## [1] "Analyzing data from: HYPOTH"
## [1] "Analyzing data from: CORTEX"
## [1] "Analyzing data from: HIPPOC"
## [1] "Analyzing data from: ADRNL"
## [1] "Analyzing data from: LUNG"
## [1] "Analyzing data from: HEART"
## [1] "Analyzing data from: LIVER"
## [1] "Analyzing data from: SKM-GN"
## [1] "Analyzing data from: WAT-SC"
## [1] "Analyzing data from: PLASMA"
## [1] "Analyzing data from: SMLINT"
## [1] "Analyzing data from: OVARY"
## [1] "Analyzing data from: COLON"
## [1] "Analyzing data from: SPLEEN"
## [1] "Analyzing data from: SKM-VL"
## [1] "Analyzing data from: BLOOD"
## [1] "Analyzing data from: BAT"
## [1] "Analyzing data from: KIDNEY"
## [1] "Analyzing data from: TESTES"
#' Helper function: compute corr matrix from data with NAs
get_corrmat<-function(x){
n = ncol(x)
m = matrix(NA,ncol=n,nrow=n,dimnames=list(colnames(x),colnames(x)))
for(i in 1:n){
inds1 = !is.na(x[,i])
if(sum(inds1)<3){next}
for(j in 1:i){
inds2 = !is.na(x[,j])
if(sum(inds2)<3){next}
inds = inds1 & inds2
m[i,j] = cor(as.numeric(x[inds,i]),as.numeric(x[inds,j]))
m[j,i] = m[i,j]
}
}
return(m)
}
#' Helper function for the statistics
library(multcomp)
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## The following object is masked from 'package:biomaRt':
##
## select
## The following object is masked from 'package:AnnotationDbi':
##
## select
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
compute_biomarker_dea_res<-function(x,group){
kw_p = kruskal.test(x,group)$p.value
df = data.frame(x=x,group=group)
m = lm(x~0+group,data=df)
coeffs = coefficients(m)
curr_groups = gsub("group","",names(coeffs))
curr_groups = sort(curr_groups[!grepl("control",curr_groups)])
cons = matrix(0,nrow=length(curr_groups),ncol=length(coeffs),
dimnames = list(curr_groups,names(coeffs)))
cons[,which(grepl("control",colnames(cons)))] = -1
for(r in rownames(cons)){cons[r,grepl(r,colnames(cons))]=1}
cons_res = glht(m, linfct = cons)
cons_res = summary(cons_res)
cons_coeffs = cons_res$test$coefficients
cons_tstats = cons_res$test$tstat
cons_names = names(cons_coeffs)
cons_ps = as.numeric(cons_res$test$pvalues)
names(cons_coeffs) = paste0("fc_",cons_names)
names(cons_ps) = paste0("pval_",cons_names)
names(cons_tstats) = paste0("tstat_",cons_names)
v = rep(NA,13)
names(v) = c("kw_p",paste0("fc_",c("1w","2w","4w","8w")),
paste0("pval_",c("1w","2w","4w","8w")),
paste0("tstat_",c("1w","2w","4w","8w")))
v["kw_p"] = kw_p
v[names(cons_coeffs)] = cons_coeffs
v[names(cons_ps)] = cons_ps
v[names(cons_tstats)] = cons_tstats
return(data.frame(t(v)))
}
library(multcomp)
compute_lm_dea<-function(x,g){
glevels = sort(unique(g))
missing_groups = setdiff(c("1w","2w","4w","8w"),glevels)
df = data.frame(x=x,g=factor(g,levels = glevels))
m0 = lm(x~1,data=df)
m1 = lm(x~1+g,data=df)
training_p = anova(m0,m1)$`Pr(>F)`[2]
m = lm(x~0+g,data=df)
K = diag(length(glevels)-1)
K = cbind(K,-1)
rownames(K) = glevels[1:nrow(K)]
pairwise_res = summary(glht(m, linfct = K))$test
names(pairwise_res$coefficients) = paste0("beta_",names(pairwise_res$coefficients))
names(pairwise_res$pvalues) = paste0("pvalue_",names(pairwise_res$tstat))
names(pairwise_res$tstat) = paste0("tscore_",names(pairwise_res$tstat))
if(length(missing_groups)>0){
pairwise_res$coefficients[paste0("beta_",missing_groups)]=NA
pairwise_res$pvalues[paste0("pvalue_",missing_groups)]=NA
pairwise_res$tstat[paste0("tscore_",missing_groups)]=NA
}
v = c(
training_p = training_p,
pairwise_res$coefficients,
pairwise_res$tstat,
pairwise_res$pvalues
)
return(data.frame(t(v),check.names = F))
}
### Stat computations
mt_biomarker_correlations = list()
mt_biomarker_dea = c()
mt_biomarker_dea_lm = c()
for(tissue_name in names(mt_biomarkers)){
tissue_df = mt_biomarkers[[tissue_name]]
for(s in c("female","male")){
x = tissue_df[tissue_df$sex==s,]
biomarkers = setdiff(names(tissue_df),c("viallabel","bid","group","sex"))
biomarkers = biomarkers[!apply(is.na(tissue_df[,biomarkers]),2,all)]
biomarkers = biomarkers[!grepl("clPC",biomarkers)]
for(biomarker in biomarkers){
currx = x[,biomarker]
currg = x[,"group"]
if(all(is.na(currx))){next}
inds=!is.na(currx)
currx = currx[inds];currg=currg[inds]
dea_res = compute_biomarker_dea_res(currx,currg)
dea_res$tissue = tissue_name
dea_res$sex = s
dea_res$biomarker = biomarker
mt_biomarker_dea = rbind(mt_biomarker_dea,dea_res)
dea_res = compute_lm_dea(currx,currg)
dea_res$tissue = tissue_name
dea_res$sex = s
dea_res$biomarker = biomarker
if(length(mt_biomarker_dea_lm)==0){
mt_biomarker_dea_lm = rbind(mt_biomarker_dea_lm,dea_res)
}else{
mt_biomarker_dea_lm = rbind(mt_biomarker_dea_lm,
dea_res[,colnames(mt_biomarker_dea_lm)])
}
}
}
biomarkers = setdiff(names(tissue_df),c("viallabel","bid","group","sex"))
biomarkers = biomarkers[!apply(is.na(tissue_df[,biomarkers]),2,all)]
biomarkers = biomarkers[!grepl("clPC",biomarkers)]
if(length(biomarkers)>1){
biomarkers = sort(biomarkers)
curr_corrs = get_corrmat(tissue_df[,biomarkers])
mt_biomarker_correlations[[tissue_name]] = curr_corrs
}
}
write.csv(
mt_biomarker_dea,
file = paste0(output_dir,"mt_biomarker_dea_ttest_pw.csv"),
row.names = F,quote = F)
write.csv(
mt_biomarker_dea_lm,
file = paste0(output_dir,"mt_biomarker_dea_lm.csv"),
row.names = F,quote = F)
save(mt_biomarkers,
mt_biomarker_correlations,
mt_biomarker_dea,
mt_biomarker_dea_lm,
file=paste0(data_dir,"mt_biomarkers.RData"))
Load the data instead of recomputing:
load(paste0(data_dir,"mt_biomarkers.RData"))
biomarkers = setdiff(names(mt_biomarkers[[1]]),c("viallabel","bid","group","sex"))
Fold changes and p-values:
mt_biomarker_dea[mt_biomarker_dea$tissue=="ADRNL" & mt_biomarker_dea$biomarker=="mt_reads",]
## kw_p fc_1w fc_2w fc_4w fc_8w pval_1w pval_2w pval_4w
## 13 0.24568191 -4.7687 -4.4532 -4.9286 -6.4990 0.18497395 0.1911021 0.1312622
## 15 0.01012549 9.6878 4.2518 2.4568 2.7152 0.00253226 0.2622035 0.7021059
## pval_8w tstat_1w tstat_2w tstat_4w tstat_8w tissue sex biomarker
## 13 0.03345132 -1.98126 -1.962411 -2.171908 -2.863943 ADRNL female mt_reads
## 15 0.62980458 4.01898 1.763857 1.019202 1.126400 ADRNL male mt_reads
mt_biomarker_dea_lm[mt_biomarker_dea_lm$tissue=="ADRNL" & mt_biomarker_dea_lm$biomarker=="mt_reads",]
## training_p beta_1w beta_2w beta_4w beta_8w tscore_1w tscore_2w tscore_4w
## 13 0.095783716 -4.7687 -4.4532 -4.9286 -6.4990 -1.98126 -1.962411 -2.171908
## 15 0.009361876 9.6878 4.2518 2.4568 2.7152 4.01898 1.763857 1.019202
## tscore_8w pvalue_1w pvalue_2w pvalue_4w pvalue_8w tissue sex biomarker
## 13 -2.863943 0.184568962 0.1908302 0.1310538 0.03331909 ADRNL female mt_reads
## 15 1.126400 0.002262956 0.2619603 0.7020763 0.62974553 ADRNL male mt_reads
Simple boxplots
df = mt_biomarkers$ADRNL
df$group = factor(df$group,levels = c("control","1w","2w","4w","8w"))
par(mar=c(5,4,1,1),mfrow=c(1,2))
boxplot(mt_reads~group,data=df[df$sex=="male",],las=2,xlab="",
main="Mito reads, males",ylab="Percent reads")
boxplot(mt_reads~group,data=df[df$sex=="female",],las=2,xlab="",
main="Mito reads, females",ylab="Percent reads")
symargs = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1),
symbols = c("****", "***", "**", "*", ""))
for(tissue in names(mt_biomarkers)){
df = mt_biomarkers[[tissue]]
df$group = factor(df$group,levels = c("control","1w","2w","4w","8w"))
if(tissue %in% c("OVARY","TESTES","VENACAVA")){next}
num_plots = 1 # for reads
if(!all(is.na(df$mtdna))){num_plots=num_plots+1}
df$CL_72_8 = df$`CL(72:8)>CL(18:2/18:2/18:2/18:2)`
if(all(is.na(df$CL_72_8))){df$CL_72_8=df$`CL(72:8)`}
if(!all(is.na(df$CL_72_8))){num_plots=num_plots+1}
curr_col = TISSUE_COLORS[tissue]
if(tissue == "SKMGN"){curr_col = TISSUE_COLORS["SKM-GN"]}
if(tissue == "SKMVL"){curr_col = TISSUE_COLORS["SKM-VL"]}
if(tissue == "WATSC"){curr_col = TISSUE_COLORS["WAT-SC"]}
# par(mar=c(5,4,1,1),mfrow=c(num_plots,2))
# boxplot(mt_reads~group,data=df[df$sex=="male",],las=2,xlab="",
# main=paste(tissue,"mito reads, males",sep=","),ylab="Percent reads",col=curr_col)
# boxplot(mt_reads~group,data=df[df$sex=="female",],las=2,xlab="",
# main=paste(tissue,"mito reads, females",sep=","),ylab="Percent reads",col=curr_col)
currplots = list()
curr_range = c(min(df$mt_reads,na.rm=T),max(df$mt_reads,na.rm=T))
new_max = curr_range[2] + 0.15 *(curr_range[2]-curr_range[1])
new_max2 = curr_range[2] + 0.3 *(curr_range[2]-curr_range[1])
p = ggboxplot(df,x="group",y="mt_reads",fill = curr_col,facet.by = "sex",notch = F) +
stat_compare_means(method = "anova", label.y = new_max,size=5, label.x = 2)+ # Add global p-value
stat_compare_means(label = "p.signif", method = "t.test",
ref.group = "control",symnum.args=symargs,size=6) + ylab("Percent reads") + xlab("") +
ggtitle(paste(tissue,"mito reads")) +
theme(strip.text.x = element_text(size = 15),plot.title = element_text(size=18,hjust=0.5))
p = ggpar(p,ylim = c(curr_range[1],new_max2))
currplots[["pct"]] = p
#print(p)
if(!all(is.na(df$mtdna))){
# boxplot(mtdna~group,data=df[df$sex=="male",],las=2,xlab="",
# main=paste(tissue,"mtDNA, males",sep=","),ylab="mtDNA level",col=curr_col)
# boxplot(mtdna~group,data=df[df$sex=="female",],las=2,xlab="",
# main=paste(tissue,"mtDNA, females",sep=","),ylab="mtDNA level",col=curr_col)
curr_range = c(min(df$mtdna,na.rm=T),max(df$mtdna,na.rm=T))
new_max = curr_range[2] + 0.15 *(curr_range[2]-curr_range[1])
new_max2 = curr_range[2] + 0.3 *(curr_range[2]-curr_range[1])
p = ggboxplot(df,x="group",y="mtdna",fill = curr_col,facet.by = "sex",notch = F) +
stat_compare_means(method = "anova", label.y = new_max, size=5,label.x = 2)+ # Add global p-value
stat_compare_means(label = "p.signif", method = "t.test",
ref.group = "control",symnum.args=symargs,size=6) + ylab("mtDNA level") + xlab("") +
ggtitle(paste(tissue,"mtDNA")) +
theme(strip.text.x = element_text(size = 15),plot.title = element_text(size=18,hjust=0.5))
p = ggpar(p,ylim = c(curr_range[1],new_max2))
currplots[["mtdna"]] = p
#print(p)
}
if(!all(is.na(df$CL_72_8))){
# boxplot(CL_72_8~group,data=df[df$sex=="male",],las=2,xlab="",
# main=paste(tissue,"CL, males",sep=","),ylab="log2 intensity",col=curr_col)
# boxplot(CL_72_8~group,data=df[df$sex=="female",],las=2,xlab="",
# main=paste(tissue,"CL, females",sep=","),ylab="log2 intensity",col=curr_col)
curr_range = c(min(df$CL_72_8,na.rm=T),max(df$CL_72_8,na.rm=T))
new_max = curr_range[2] + 0.15 *(curr_range[2]-curr_range[1])
new_max2 = curr_range[2] + 0.3 *(curr_range[2]-curr_range[1])
p = ggboxplot(df,x="group",y="CL_72_8",fill = curr_col,facet.by = "sex",notch = F) +
stat_compare_means(method = "anova",label.y = new_max,size=5,label.x = 2)+ # Add global p-value
stat_compare_means(label = "p.signif", method = "t.test",
ref.group = "control",symnum.args=symargs,size=6) + ylab("log2 intensity") + xlab("") +
ggtitle(paste(tissue,"CL(72:8)")) +
theme(strip.text.x = element_text(size = 15),plot.title = element_text(size=18,hjust=0.5))
p = ggpar(p,ylim = c(curr_range[1],new_max2))
currplots[["cl"]] = p
#print(p)
}
do.call("grid.arrange", c(currplots, nrow = 1))
}
get_stat_matrix<-function(mt_biomarker_dea,stat_name,biomarkers,tissues){
m = matrix(NA,nrow=length(tissues),ncol=2*length(biomarkers),
dimnames = list(tissues,c(paste0(biomarkers,"-male"),paste0(biomarkers,"-female"))))
for(tissue in tissues){
for(biomarker in biomarkers){
for(sex in c("male","female")){
curr_ind = mt_biomarker_dea$tissue==tissue & mt_biomarker_dea$sex==sex &
mt_biomarker_dea$biomarker==biomarker
if(sum(curr_ind)!=1){next}
m[tissue,paste(biomarker,sex,sep="-")] = mt_biomarker_dea[curr_ind,stat_name]
}
}
}
return(m)
}
mtdna_pctreads = sapply(mt_biomarker_correlations,function(x)x["mtdna","mt_reads"])
mtdna_pctreads = sort(mtdna_pctreads,decreasing = T)
names(mtdna_pctreads)[names(mtdna_pctreads)=="SKMGN"] = "SKM-GN"
names(mtdna_pctreads)[names(mtdna_pctreads)=="SKMVL"] = "SKM-VL"
names(mtdna_pctreads)[names(mtdna_pctreads)=="WATSC"] = "WAT-SC"
barplot(mtdna_pctreads,las=2,col=TISSUE_COLORS[names(mtdna_pctreads)],
ylab = "Pearson correlation",cex.axis = 1.2,space = 0)
abline(h=0.5,lty=2,lwd=2)
tissues_with_cls = names(mt_biomarker_correlations)[sapply(mt_biomarker_correlations,nrow)>2]
corrgplots = list()
for(tissue_name in tissues_with_cls){
m = mt_biomarker_correlations[[tissue_name]]
rownames(m) = sapply(rownames(m),function(x)strsplit(x,split=">")[[1]][1])
colnames(m) = rownames(m)
m_bin = abs(m) < 0.5
mode(m_bin) = "numeric"
p = ggcorrplot(m,p.mat=m_bin,title = tissue_name,method = "circle")
corrgplots[[tissue_name]] = p
}
do.call("grid.arrange", c(corrgplots[c("WATSC","LUNG")], ncol = 1))
sapply(corrgplots[c("LIVER","HEART","KIDNEY","SKMGN")],print)
## LIVER HEART KIDNEY SKMGN
## data List,7 List,7 List,7 List,7
## layers List,2 List,2 List,2 List,2
## scales ? ? ? ?
## mapping List,3 List,3 List,3 List,3
## theme List,93 List,93 List,93 List,93
## coordinates ? ? ? ?
## facet ? ? ? ?
## plot_env ? ? ? ?
## labels List,5 List,5 List,5 List,5
## guides List,1 List,1 List,1 List,1
tissues = sort(unique(mt_biomarker_dea$tissue))
cls = biomarkers[grepl("CL",biomarkers)]
non_cls = setdiff(biomarkers,cls)
tissues = setdiff(tissues,c("OVARY","TESTES"))
mt_biomarker_dea$biomarker[mt_biomarker_dea$biomarker == "CL(72:8)>CL(18:2/18:2/18:2/18:2)"] = "CL(72:8)"
sort(table(mt_biomarker_dea$biomarker))
##
## CL(70:4)
## 2
## CL(70:5)
## 2
## CL(70:7)>CL(16:1_18:2_18:2_18:2)
## 2
## CL(72:10)
## 2
## CL(72:9)
## 2
## CL(74:10)
## 2
## CL(74:10)>CL(18:2_18:2_18:2_20:4)
## 2
## CL(74:10)A
## 2
## CL(74:7)
## 2
## CL(74:8)
## 2
## CL(74:8)>CL(18:1_18:2_18:2_20:3)_and_CL(18:2_18:2_18:2_20:2)
## 2
## CL(74:8)>CL(18:2_18:2_18:2_20:2)_and_CL(18:1_18:2_18:2_20:3)
## 2
## CL(74:9)>CL(18:1_18:2_18:2_20:4)_and_CL(18:2_18:2_18:2_20:3)
## 2
## CL(76:11)
## 2
## CL(76:11)>CL(18:1_18:2_18:2_22:6)
## 2
## CL(76:13)
## 2
## CL(70:6)
## 4
## CL(72:6)
## 4
## CL(74:11)>CL(18:2_18:2_18:2_20:5)
## 4
## CL(70:7)
## 6
## CL(72:6)>CL(18:1_18:1_18:2_18:2)
## 6
## CL(72:7)
## 6
## CL(72:7)>CL(18:1_18:2_18:2_18:2)
## 6
## CL(74:10)>CL(18:1_18:2_18:2_20:5)
## 6
## CL(76:12)
## 6
## CL(74:11)
## 8
## CL(74:9)
## 8
## CL(72:8)
## 12
## mtdna
## 28
## mt_reads
## 34
biomarkers = c("mtdna","mt_reads","CL(74:9)","CL(74:11)","CL(72:8)")
mt_group_kw = get_stat_matrix(mt_biomarker_dea,"kw_p",biomarkers,tissues)
corrplot(t(-log10(mt_group_kw)),is.corr = F,method = "circle",
p.mat = t(mt_group_kw),sig.level = 0.05,tl.col="black",col=rev(COL2('RdBu', 200)))
par(mar=c(8,8,8,8))
tissue_counts_kw = colSums(mt_group_kw<0.05,na.rm=T)
barplot(sort(tissue_counts_kw,decreasing = T),las=2,col="white",
ylab = "Num tissues with p<0.05")
fc8w = get_stat_matrix(mt_biomarker_dea,"fc_8w",biomarkers,tissues)
p8w = get_stat_matrix(mt_biomarker_dea,"pval_8w",biomarkers,tissues)
tstats = get_stat_matrix(mt_biomarker_dea,"tstat_8w",biomarkers,tissues)
corrplot(t(tstats),method = "circle",is.corr = F,p.mat = t(p8w),sig.level = 0.05,
tl.col="black",col=rev(COL2('RdBu', 200)))
par(mar=c(8,8,8,8))
tissue_counts_8w = colSums(p8w<0.05,na.rm=T)
barplot(sort(tissue_counts_8w,decreasing = T),las=2,col="white",ylab = "Num tissues with p<0.05")
fc1w = get_stat_matrix(mt_biomarker_dea,"fc_1w",biomarkers,tissues)
p1w = get_stat_matrix(mt_biomarker_dea,"pval_1w",biomarkers,tissues)
tstats1w = get_stat_matrix(mt_biomarker_dea,"tstat_1w",biomarkers,tissues)
corrplot(t(tstats1w),method = "circle",is.corr = F,p.mat = t(p1w),sig.level = 0.05,
tl.col="black",col=rev(COL2('RdBu', 200)))
par(mar=c(8,8,8,8))
tissue_counts_1w = colSums(p1w<0.05,na.rm=T)
barplot(sort(tissue_counts_1w,decreasing = T),las=2,col="white",
ylab = "Num tissues with p<0.05")